"""
配置文件
"""
import os
from pathlib import Path

# 项目根目录
BASE_DIR = Path(__file__).parent

# 数据目录
DATA_DIR = BASE_DIR / "data"
RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"

# 模型目录
MODELS_DIR = BASE_DIR / "models"
FAISS_INDEX_DIR = MODELS_DIR / "faiss_indices"
EMBEDDING_MODEL_NAME = "dmis-lab/biobert-v1.1"  # BioBERT模型

# PubMed API配置
PUBMED_API_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
PUBMED_BATCH_SIZE = 100
PUBMED_DATE_RANGE = ("2023/01/01", "2025/12/31")

# 向量化配置
EMBEDDING_DIM = 768  # BioBERT输出维度
TOP_K = 10  # 检索返回的文档数量

# RAG配置
SUMMARY_LENGTH = "3-5 sentences"  # 摘要长度
CONTEXT_LENGTH = 2048  # 上下文长度
TEMPERATURE = 0.7
MAX_NEW_TOKENS = 200

# 本地LLM配置（如果使用）
LOCAL_LLM_PATH = None  # 填入本地LLM路径，如 "/path/to/llama-2-7b"
USE_LOCAL_LLM = False

# OpenAI API配置（备选方案）
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
USE_OPENAI = False

# 评估指标
EVALUATION_METRICS = ["precision@10", "rouge-l", "user_satisfaction"]
